library(tidyverse)
library(plotly)
data <- read_csv('./gapminder_clean.csv')
data <- data %>%
select(-1) %>%
rename(
co2em = `CO2 emissions (metric tons per capita)`,
popden = `Population density (people per sq. km of land area)`,
)
data1962 <- data %>%
filter(Year == 1962) %>%
select(gdpPercap, co2em) %>%
drop_na()
ggplot(data = data1962) +
geom_point(mapping = aes(
x = gdpPercap,
y = co2em)) +
labs(x = "GDP per capita", y = "CO2 emissions per capita (metric tons)")
cor.test(data1962 %>% pull(gdpPercap), data1962 %>% pull(co2em))
##
## Pearson's product-moment correlation
##
## data: data1962 %>% pull(gdpPercap) and data1962 %>% pull(co2em)
## t = 25.269, df = 106, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8934697 0.9489792
## sample estimates:
## cor
## 0.9260817
corrs <- data %>%
group_by(Year) %>%
select(Year, gdpPercap, co2em) %>%
drop_na() %>%
summarise(correlation = cor(gdpPercap, co2em))
maxi <- lapply(corrs, max)
The strongest correlation is 0.9387918 in the year 2007.
max_em_year_data <- data %>%
filter(Year == maxi$Year) %>%
select(gdpPercap, co2em, pop, continent, `Country Name`) %>%
drop_na()
fig <- ggplot(data = max_em_year_data) +
geom_point(aes(
x = gdpPercap,
y = co2em,
size = pop,
color = continent,
text = paste("Country: ", `Country Name`,
"\nGDP: ", gdpPercap,
"\nCO2 emissions: ", co2em))) +
xlab("GDP per capita") +
ylab("CO2 emissions per capita (metric tons)") +
ggtitle(str_glue("GDP vs CO2 emissions per capita in ", maxi$Year))
ggplotly(fig, tooltip = "text")
data_popden <- data %>%
group_by(`Country Name`) %>%
select(`Country Name`, popden, continent) %>%
summarise(avg_popden = mean(popden, na.rm = TRUE)) %>%
arrange(desc(avg_popden))
num_countries_shown <- 20
ggplot(data = head(data_popden, n = num_countries_shown)) +
geom_bar(
mapping = aes(x = avg_popden, y = reorder(`Country Name`, avg_popden)),
stat = "identity") +
xlab("Average population density (people per sq. km of land)") +
ylab("") +
ggtitle(str_glue(num_countries_shown, " most population dense countries 1962-2007"))
The country with the highest average population density between 1962 and 2007 is Macao SAR, China.